{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"../../data/telecom_churn.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. Признаки по одному \n",
    "## 1.1. Количественные\n",
    "Гистограмма и боксплот"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Total day minutes\"].hist();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.boxplot(df[\"Total day minutes\"]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.hist();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.2. Категориальные\n",
    "countplot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"State\"].value_counts().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Churn\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.countplot(df[\"Churn\"]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.countplot(df[\"State\"]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.countplot(df[df[\"State\"].isin(df[\"State\"].value_counts().head().index)][\"State\"]);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. Взаимодействия признаков\n",
    "## 2.1. Количественный с количественным\n",
    "pairplot, scatterplot, корреляции, heatmap"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "feat = [f for f in df.columns if \"charge\" in f]\n",
    "\n",
    "df[feat].hist();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.pairplot(df[feat]);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"Churn\"] = df[\"Churn\"].map({0: False, 1: True})\n",
    "df[\"Churn\"].map({False: \"blue\", True: \"orange\"}).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[~df[\"Churn\"]].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(\n",
    "    df[df[\"Churn\"]][\"Total eve charge\"],\n",
    "    df[df[\"Churn\"]][\"Total intl charge\"],\n",
    "    color=\"orange\",\n",
    "    label=\"churn\",\n",
    ")\n",
    "plt.scatter(\n",
    "    df[~df[\"Churn\"]][\"Total eve charge\"],\n",
    "    df[~df[\"Churn\"]][\"Total intl charge\"],\n",
    "    color=\"blue\",\n",
    "    label=\"loyal\",\n",
    ")\n",
    "plt.xlabel(\"Вечерние начисления\")\n",
    "plt.ylabel(\"Межнар. начисления\")\n",
    "plt.title(\"Распределение начислений для лояльных/ушедших\")\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.heatmap(df.corr());"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop(feat, axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.heatmap(df.corr());"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2.  Количественный с категориальным\n",
    "boxplot, violinplot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.boxplot(x=\"Churn\", y=\"Total day minutes\", data=df);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.boxplot(x=\"State\", y=\"Total day minutes\", data=df);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.violinplot(x=\"Churn\", y=\"Total day minutes\", data=df);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby(\"International plan\")[\"Total day minutes\"].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.boxplot(x=\"International plan\", y=\"Total day minutes\", data=df);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3. Категориальный с категориальным\n",
    "countplot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.crosstab(df[\"Churn\"], df[\"International plan\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.countplot(x=\"International plan\", hue=\"Churn\", data=df);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sns.countplot(x=\"Customer service calls\", hue=\"Churn\", data=df);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Прочее\n",
    "Manifold learning, один из представителей – t-SNE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.manifold import TSNE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tsne = TSNE(random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df.drop([\"State\", \"Churn\"], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2[\"International plan\"] = df2[\"International plan\"].map({\"Yes\": 1, \"No\": 0})\n",
    "df2[\"Voice mail plan\"] = df2[\"Voice mail plan\"].map({\"Yes\": 1, \"No\": 0})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "tsne.fit(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.scatter(\n",
    "    tsne.embedding_[df[\"Churn\"].values, 0],\n",
    "    tsne.embedding_[df[\"Churn\"].values, 1],\n",
    "    color=\"orange\",\n",
    "    alpha=0.7,\n",
    ")\n",
    "plt.scatter(\n",
    "    tsne.embedding_[~df[\"Churn\"].values, 0],\n",
    "    tsne.embedding_[~df[\"Churn\"].values, 1],\n",
    "    color=\"blue\",\n",
    "    alpha=0.7,\n",
    ");"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
